
clear all
insheet using "$data/raw_data/survey2.csv", names clear

drop in 1/2


keep if rid!=""
duplicates drop rid, force



destring *, replace

keep if finish==1



***********************
**** Add case data ****
***********************


gen casesmarch16 = 0
replace casesmarch16=29 if state==1
replace casesmarch16=1 if state==2
replace casesmarch16=18 if state==3
replace casesmarch16=3 if state==4
replace casesmarch16=557 if state==5
replace casesmarch16=132 if state==6
replace casesmarch16=41 if state==7
replace casesmarch16=8 if state==8
replace casesmarch16=22 if state==9
replace casesmarch16=101 if state==10
replace casesmarch16=121 if state==11
replace casesmarch16=8 if state==12
replace casesmarch16=4 if state==13
replace casesmarch16=83 if state==14
replace casesmarch16=25 if state==15
replace casesmarch16=23 if state==16
replace casesmarch16=11 if state==17
replace casesmarch16=21 if state==18
replace casesmarch16=115 if state==19
replace casesmarch16=17 if state==20
replace casesmarch16=42 if state==21
replace casesmarch16=187 if state==22
replace casesmarch16=54 if state==23
replace casesmarch16=53 if state==24
replace casesmarch16=12 if state==25
replace casesmarch16=6 if state==26
replace casesmarch16=6 if state==27
replace casesmarch16=18 if state==28
replace casesmarch16=45 if state==29
replace casesmarch16=17 if state==30
replace casesmarch16=176 if state==31
replace casesmarch16=21 if state==32
replace casesmarch16=967 if state==33
replace casesmarch16=39 if state==34
replace casesmarch16=1 if state==35
replace casesmarch16=50 if state==36
replace casesmarch16=10 if state==37
replace casesmarch16=39 if state==38
replace casesmarch16=79 if state==39
replace casesmarch16=. if state==40
replace casesmarch16=21 if state==41
replace casesmarch16=30 if state==42
replace casesmarch16=10 if state==43
replace casesmarch16=52 if state==44
replace casesmarch16=85 if state==45
replace casesmarch16=28 if state==46
replace casesmarch16=12 if state==47
replace casesmarch16=49 if state==48
replace casesmarch16=779 if state==49
replace casesmarch16=0 if state==50
replace casesmarch16=48 if state==51
replace casesmarch16=2 if state==52


gen anycase_march16 = casesmarch16>0





********************************************************************************
********************************************************************************
*clean demographics.
********************************************************************************
********************************************************************************

recode male (2=0)(3=0)
label var male "Male"

gen age_18_24=age==1 if age!=.
gen age_25_34=age==2 if age!=.
gen age_35_44=age==3 if age!=.
gen age_45_54=age==4 if age!=.
gen age_55_64=age==5 if age!=.
gen age_65=age==6 if age!=.

label var age_18_24 "Aged 18 to 24"
label var age_25_34 "Aged 25 to 34"
label var age_35_44 "Aged 35 to 44"
label var age_45_54 "Aged 45 to 54"
label var age_55_64 "Aged 55 to 64"
label var age_65 "Aged 65 above"

gen empl_ft=emply==1 if emply!=.
gen empl_pt=emply==2 if emply!=.
gen empl_retired=emply==5 if emply!=.
gen empl_student=emply==7 if emply!=.


gen inc_15=incomedem==1 if incomedem!=.
gen inc_15_25=incomedem==2 if incomedem!=.
gen inc_25_50=incomedem==3 if incomedem!=.
gen inc_50_75=incomedem==4 if incomedem!=.
gen inc_75_100=incomedem==5 if incomedem!=.
gen inc_100_150=incomedem==6 if incomedem!=.
gen inc_150_200=incomedem==7 if incomedem!=.
gen inc_200=incomedem==8 if incomedem!=.
gen inc_miss = incomedem==9 

gen inc=.
replace inc=10000 if incomedem==1
replace inc=20000 if incomedem==2
replace inc=37500 if incomedem==3
replace inc=62500 if incomedem==4
replace inc=87500 if incomedem==5
replace inc=125000 if incomedem==6
replace inc=175000 if incomedem==7
replace inc=250000 if incomedem==8

sum inc ,d
replace inc=r(mean) if inc_miss==1


gen log_inc=log(inc) 
gen log_inc_sq=(log_inc)^2

label var log_inc "Log income"
label var log_inc_sq "Log income squared"

gen unemp = emply==4 if emply!=.
label var unemp "Unemployed"
gen student = emply==9 if emply!=.
label var student "Student"
gen working = inlist(emply,1,2,3) if emply!=.
label var working "Working full or part time"

gen below_highschool=educ==1 if educ!=.
gen highschool=educ==2 if educ!=.
label var highschool "Highest degree: highschool"
gen college=educ>2 if educ!=.
label var college "Some post secondary education"

gen college_degree =educ>=5
label var college_degree "Has college degree"


gen democrat = pol==2 if pol!=.
gen republican = pol==1 if pol!=.
gen independent = pol==3 if pol!=.
gen pol_other = pol==4 if pol!=.






***** Manipulation checks **********
ren manip_* impact_*
label var  impact_1 "The World will be severely affected by the coronavirus."
label var impact_2 "The US will be severely affected by the coronavirus."
recode impact_1  impact_2   (18=1)(19=2)(20=3)(21=4)(22=5) // High is pessimistic

label define impact 1 "Strongly  disagree" 2"Disagree" 3 "Neither agree nor disagree" 4 "Agree" 5 "Strongly agree" 
label val impact_1  impact
label val impact_2  impact

label var worry_corona_econ "Are you worried about the effects of the coronavirus on the US economy?"
ren worry_econpersonal worry_perecon
label var worry_perecon "Are you worried about the effects of the coronavirus on your household's economic situation?"

recode worry_corona_econ  worry_perecon (4=1)(3=2)(2=3)(1=4) // High is worried

label define worry 1 "Not at all worried" 2  "Not worried"  3 "Worried" 4 "Very worried"
label val worry_corona_econ worry
label val worry_perecon worry

foreach x in impact_1  impact_2  worry_corona_econ worry_perecon   {
egen z_`x' = std(`x')
}

*** Expontentional growth ***
label var day5 "Number of cases after 5 days"
label var day10 "Number of cases after 10 days"
label var day20 "Number of cases after 20 days"


foreach x in day5 day10 day20 doublingtime {
sum `x' , d
gen `x'_w = `x'
replace `x'_w = `r(p95)' if  `x'>`r(p95)' 

gen ln`x'=ln(`x')

}

label var day5_w "Number of cases after 5 days (w)"
label var day10_w "Number of cases after 10 days  (w)"
label var day20_w "Number of cases after 20 days  (w)"

label var lnday5 "Log number of cases after 5 days"
label var lnday10 "Log number of cases after 10 days"
label var lnday20 "Log number of cases after 20 days"



label var male "Male"
label var age "Age (categorical)"
label var incomedem "Income"
label var highschool "Highschool education"
label var college "College eductation"
label var working "Currently working"
label var unemp "Unemployed"
label var student "Student"
label var democrat "Democrat"
label var republican "Republican"

ren perceived_motailty perceived_mortality
label var perceived_mortality "Predicted  mortality rate"
label var perceived_contagion "Infectiousness (R_0)"


save "$data/survey2_clean.dta", replace


*** Add classification of mental model (need run blocks 117- 119 in python to generate finalcluster.csv) ***

import delimited "$data/finalcluster.csv", varnames(1) stripquote(yes) clear  
keep if !missing(indexcluster)
keep responseid indexcluster
merge 1:1 responseid using  "$data/survey2_clean.dta"
keep if _merge==3
drop _merge


gen exponential =	 indexcluster==0 
gen linear =	 indexcluster==1
gen other =	 indexcluster==2

label var exponential "Exponential mental model of disease spread"
label var linear "Linear mental model of disease spread"
label var other "Other mental model of disease spread"

* Labeling *




keep *day5* *day10* *day20* responseid indexcluster exponential linear  other male age age_25_34 ///
age_35_44 age_45_54 age_55_64 age_65 inc log_inc log_inc_sq state  perceived_contagion perceived_mortality ///
highschool college* unemp trust_science working student democrat ///
republican *worry_corona_econ *worry_perecon *impact_1* *impact_2* anycase_march16 

save "$data/survey2_clean.dta", replace






